Code
# Set options
knitr:: opts_chunk$ set (
echo = TRUE ,
warning = FALSE ,
message = FALSE ,
fig.align = 'center' ,
fig.retina = 2
)
rm (list= ls ())
library (tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library (ggplot2)
#library(table1)
library (gt)
library (survival)
library (data.table)
library (randomForest)
library (grf)
library (policytree)
library (DiagrammeR)
#library(grid)
#library(forestploter)
#library(randomizr)
# library(devtools)
# install_github("larry-leon/weightedsurv", force = TRUE)
#install.packages("weightedsurv")
# install_github("larry-leon/forestsearch", force = TRUE)
library (forestsearch)
library (weightedsurv)
# Set theme for plots
theme_set (theme_minimal (base_size = 12 ))
Summary
Reproducing main GBSG analysis
Datasetup
Code
df.analysis <- gbsg
df.analysis <- within (df.analysis,{
id <- as.numeric (c (1 : nrow (df.analysis)))
# time to months
time_months <- rfstime/ 30.4375
grade3 <- ifelse (grade== "3" ,1 ,0 )
treat <- hormon
})
confounders.name <- c ("age" ,"meno" ,"size" ,"grade3" ,"nodes" ,"pgr" ,"er" )
outcome.name <- c ("time_months" )
event.name <- c ("status" )
id.name <- c ("id" )
treat.name <- c ("hormon" )
Kaplan-Meier curves and baseline summary
Code
dfcount <- df_counting (
df = df.analysis,
by.risk = 6 ,
tte.name = outcome.name,
event.name = event.name,
treat.name = treat.name
)
plot_weighted_km (dfcount, conf.int = TRUE , show.logrank = TRUE , ymax = 1.05 , xmed.fraction = 0.775 , ymed.offset = 0.125 )
Code
create_summary_table (data = df.analysis, treat_var = treat.name,
table_title = "GBSG Characteristics by Treatment Arm" ,
vars_continuous= c ("age" ,"nodes" ,"size" ,"er" ,"pgr" ),
vars_categorical= c ("grade" ,"grade3" ),
font_size = 12 )
Characteristic
Control (n=440)
Treatment (n=246)
P-value
SMD
age
Mean (SD)
51.1 (10.0)
56.6 (9.4)
<0.001
0.57
nodes
Mean (SD)
4.9 (5.6)
5.1 (5.3)
0.665
0.03
size
Mean (SD)
29.6 (14.4)
28.8 (14.1)
0.470
0.06
er
Mean (SD)
79.7 (124.2)
125.8 (191.1)
<0.001
0.30
pgr
Mean (SD)
102.0 (170.0)
124.3 (249.7)
0.213
0.11
grade
0.273
0.06
1
48 (10.9%)
33 (13.4%)
2
281 (63.9%)
163 (66.3%)
3
111 (25.2%)
50 (20.3%)
grade3
0.174
0.05
0
329 (74.8%)
196 (79.7%)
1
111 (25.2%)
50 (20.3%)
GRF analysis
Code
## GRF
grf_est1 <- grf.subg.harm.survival (data= df.analysis,
confounders.name = confounders.name,
outcome.name= outcome.name, event.name= event.name, id.name= id.name, treat.name= treat.name,
maxdepth = 2 , n.min = 60 , dmin.grf = 12 , frac.tau= 0.6 , details= TRUE )
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
Code
# NOTE : In general for GRF trees
# leaf1 --> recommend control
# leaf2 --> recommend treatment
# Tree depth 1
plot (grf_est1$ tree1,leaf.labels= c ("Control" ,"Treat" ))
Code
# Tree depth 2
plot (grf_est1$ tree2,leaf.labels= c ("Control" ,"Treat" ))
Forestsearch with depth=2 (maxk = 2)
Code
# Setup parallel processing
library (doFuture)
library (doRNG)
registerDoFuture ()
registerDoRNG ()
system.time ({fs <- forestsearch (df.analysis, confounders.name = confounders.name,
outcome.name = "time_months" , treat.name = "hormon" , event.name = "status" , id.name = "id" ,
potentialOutcome.name = NULL ,
df.test = NULL ,
flag_harm.name = NULL ,
hr.threshold = 1.25 , hr.consistency = 1.0 , pconsistency.threshold = 0.90 ,
sg_focus = "hr" , max_subgroups_search = 30 ,
use_twostage = TRUE ,
showten_subgroups = TRUE , details= TRUE ,
conf_force = NULL ,
cut_type = "default" , use_grf = TRUE , plot.grf = TRUE , use_lasso = TRUE ,
maxk = 2 , fs.splits = 1000 ,
n.min = 60 , d0.min = 10 , d1.min = 10 ,
plot.sg = TRUE , by.risk = 6 ,
parallel_args = list (plan= "callr" , workers = 30 , show_message = TRUE )
)
})
=== Two-Stage Consistency Evaluation Enabled ===
Stage 1 screening splits: 30
Maximum total splits: 1000
Batch size: 20
================================================
GRF stage for cut selection with dmin, tau = 12 0.6
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
GRF cuts identified: 3
Cuts: er <= 0, age <= 50, age <= 43
# of continuous/categorical characteristics 5 2
Continuous characteristics: age size nodes pgr er
Categorical characteristics: meno grade3
## Prior to lasso: age size nodes pgr er
#### Lasso selection results
7 x 1 sparse Matrix of class "dgCMatrix"
s0
age .
meno .
size 0.005433435
grade3 0.178139021
nodes 0.049670523
pgr -0.001812895
er .
Cox-LASSO selected: size grade3 nodes pgr
Cox-LASSO not selected: age meno er
### End Lasso selection
## After lasso: size nodes pgr
Default cuts included from Lasso: size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr)
Categorical after Lasso: grade3
Factors per GRF: er <= 0 age <= 50 age <= 43
Initial GRF cuts included er <= 0 age <= 50 age <= 43
Factors included per GRF (not in lasso) er <= 0 age <= 50 age <= 43
===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 16 cut expressions once and caching...
Cut evaluation summary:
Total cuts: 16
Valid cuts: 16
Errors: 0
✓ All 16 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====
# of candidate subgroup factors= 16
[1] "er <= 0" "age <= 50" "age <= 43" "size <= 29.3" "size <= 25"
[6] "size <= 20" "size <= 35" "nodes <= 5" "nodes <= 3" "nodes <= 1"
[11] "nodes <= 7" "pgr <= 110" "pgr <= 32.5" "pgr <= 7" "pgr <= 131.8"
[16] "grade3"
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 528
Events criteria: control >= 10 , treatment >= 10
Subgroup search completed in 0.01 minutes
Found 13 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 13
# of unique initial candidates: 13
# Restricting to top stop_Kgroups = 30
# of candidates to evaluate: 13
Algorithm: Two-stage sequential
Stage 1 splits: 30
Screen threshold: 0.763
Max total splits: 1000
Batch size: 20
Parallel processing: callr with 30 workers
*** Subgroup found: {er <= 0} {size <= 35}
% consistency criteria met= 1
SG focus= hr
Subgroup Consistency Minutes= 0.046
Algorithm used: Two-stage sequential
Candidates evaluated: 13
Candidates passed: 7
Subgroup found (FS) with sg_focus='hr'
Selected subgroup: {er <= 0} & {size <= 35}
Minutes forestsearch overall = 0.06
Consistency algorithm used: twostage
user system elapsed
19.568 1.593 3.791
Code
plan ("sequential" )
# Results for estimation (training) data, which_df = "est" is default
res_tabs <- sg_tables (fs, ndecimals = 3 , which_df = "est" )
res_tabs$ sg10_out
Two-factor subgroups (maxk=2)
{er <= 0}
{size <= 35}
61
34
15
2.537
1.000
{er <= 0}
{nodes <= 7}
61
31
11
2.335
0.970
{er <= 0}
!{age <= 43}
68
38
14
2.164
0.970
{er <= 0}
82
45
16
1.951
0.970
{er <= 0}
!{size <= 20}
61
35
12
2.054
0.960
{er <= 0}
{pgr <= 32.5}
75
41
16
2.222
0.950
{er <= 0}
{pgr <= 7}
64
34
13
1.992
0.910
Search Configuration: Single-factor candidates (L) = 32; Maximum combinations evaluated = 528; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 13; Maximum HR estimate = 2.54
Note: E1 = events in treatment arm; Pcons = consistency proportion
Code
Training data estimates
ITT
686 (100.0%)
246 (35.9%)
299 (43.6%)
66.3
50.2
7.8
0.69 (0.54, 0.89)
Questionable
61 (8.9%)
23 (37.7%)
34 (55.7%)
18.5
48
-19
2.54 (1.25, 5.17)
Recommend
625 (91.1%)
223 (35.7%)
265 (42.4%)
66.7
52.2
9.6
0.61 (0.47, 0.79)
Bootstrap Inference
Code
#output_dir <- "dev/vignettes-working/applications/gbsg/results"
output_dir <- "results/"
save_results <- dir.exists (output_dir)
# File pre-fix for saving
fileout_boot <- c ("gbsg-k2_v3_hr_B=1000" )
fileout_cv <- c ("gbsg-k2_v3_hr_CV=200" )
# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library (patchwork)
# Number of bootstrap samples
NB <- 1000
system.time ({fs_bc <- forestsearch_bootstrap_dofuture (
fs.est = fs,
nb_boots = NB,
show_three = FALSE ,
details = TRUE )
})
Ystar matrix generated should be 'boots x N': 1000 x 686
ForestSearch parameters for bootstrap iterations:
- sg_focus: hr
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: TRUE
- use_grf: TRUE
Bootstrap-specific overrides:
- grf_res: NULL (forces re-selection)
- grf_cuts: NULL (forces re-selection)
- parallel_args: sequential (prevents nested parallelism)
- details: FALSE (suppressed in workers)
- plot.sg: FALSE
- plot.grf: FALSE
=== Bootstrap Analysis Complete ===
Success rate: 87.2% (872/1000)
H (Questionable) Estimates:
Unadjusted: 2.54 (1.25,5.17)
Bias-corrected: 1.89 (0.87,4.12)
Hc (Recommend) Estimates:
Unadjusted: 0.61 (0.47,0.79)
Bias-corrected: 0.63 (0.44,0.9)
===================================
user system elapsed
9665.319 158.408 773.611
Code
plan ("sequential" )
if (save_results) {
filename <- file.path (output_dir,
paste0 (fileout_boot,
".RData" ))
save (df.analysis, fs, fs_bc, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg-k2_v3_hr_B=1000.RData
Diagnostics and Summaries
Code
#load("~/Documents/GitHub/forestsearch/vignettes/results/sim_gbsg_example_B=1000.RData")
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_boot,".RData" ))
load (file = filename)
}
summaries <- summarize_bootstrap_results (
sgharm = fs$ sg.harm,
boot_results = fs_bc,
create_plots = TRUE ,
est.scale = "hr"
)
===============================================================
BOOTSTRAP ANALYSIS SUMMARY
===============================================================
BOOTSTRAP SUCCESS METRICS:
-------------------------------------------------------------
Total iterations: 1000
Successful subgroup ID: 872 (87.2%)
Failed to find subgroup: 128 (12.8%)
TIMING ANALYSIS:
-------------------------------------------------------------
Overall:
Total bootstrap time: 12.87 minutes (0.21 hours)
Average per iteration: 0.01 min (0.8 sec)
Per-iteration timing:
Mean: 0.16 min (9.9 sec)
Median: 0.15 min (9.2 sec)
Std Dev: 0.10 minutes
Range: [0.01, 0.59] minutes
IQR: [0.09, 0.23] minutes
ForestSearch timing (successful iterations only):
Iterations with FS: 1000 (100.0%)
Mean FS time: 0.16 min (9.9 sec)
Median FS time: 0.15 min (9.2 sec)
Total FS time: 164.74 minutes
FS time % of total: 1280.5%
Overhead timing (Cox models, bias correction, etc.):
Mean overhead: 0.00 min (0.0 sec)
Median overhead: 0.00 min (0.0 sec)
Total overhead: 0.19 minutes
Overhead % of total: 1.5%
PERFORMANCE ASSESSMENT:
-------------------------------------------------------------
Performance rating: ✓✓✓ Excellent
Average iteration speed: 0.8 seconds
===============================================================
Code
sg_tab <- summaries$ table
sg_tab
Bootstrap bias-corrected estimates (1000 iterations)
N
NT
Events
MedT
MedC
RMSTd
HR (95% CI)†
HR‡ (95% CI)
Qstnbl
61 (8.9%)
23 (37.7%)
34 (55.7%)
18.5
48
-19
2.54 (1.25, 5.17)
1.89 (0.87,4.12)
Recmnd
625 (91.1%)
223 (35.7%)
265 (42.4%)
66.7
52.2
9.6
0.61 (0.47, 0.79)
0.63 (0.44,0.9)
Note : Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 87.2% of bootstrap samples.
Code
event_summary <- summarize_bootstrap_events (fs_bc, threshold = 12 )
=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 1000
Event threshold: <12 events
ORIGINAL Subgroup H on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
ORIGINAL Subgroup Hc on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
NEW Subgroups found: 872 (87.2%)
NEW Subgroup H* on ORIGINAL data:
Control arm <12 events: 29 (3.3% of successful)
Treatment arm <12 events: 68 (7.8% of successful)
Either arm <12 events: 94 (10.8% of successful)
NEW Subgroup Hc* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 0 (0.0% of successful)
Either arm <12 events: 0 (0.0% of successful)
Code
summaries$ diagnostics_table_gt
Analysis of 1000 bootstrap iterations
Success Rate
Total iterations
1000
Successful subgroup ID
872 (87.2%)
Failed to find subgroup
128 (12.8%)
Success rating
Good ✓✓
Subgroup H (Questionable)
Unadjusted estimate
2.54 (1.25, 5.17)
Bias-corrected estimate
1.89 (0.87, 4.12)
Bias correction impact
25.4%
CI width change
3.92 -> 3.26
Subgroup Hc (Recommend)
Unadjusted estimate
0.61 (0.47, 0.79)
Bias-corrected estimate
0.63 (0.44, 0.90)
Bias correction impact
3.2%
CI width change
0.32 -> 0.47
Bootstrap Quality: H
Valid iterations
872
Mean (SD)
0.64 (0.50)
Coefficient of variation
78.5%
Skewness
-0.16
Bootstrap Quality: Hc
Valid iterations
872
Mean (SD)
-0.47 (0.21)
Coefficient of variation
45.4%
Skewness
0.15
Search Performance
Mean max HR found
3.19 (1.27)
Mean factors evaluated
39.7
Mean combinations tried
838
Proportion at maxk
--
Interpretation Guide:
✓ Good stability : Subgroup is reliably identified in most bootstrap samples.
⚠ High variability : Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.
Code
summaries$ subgroup_summary$ original_agreement
Metric Value
<char> <char>
1: Total bootstrap iterations 1000
2: Successful iterations 872
3: Failed iterations (no subgroup) 128
4: Exact match with original 136 (15.6%)
5: Different from original 736 (84.4%)
Code
summaries$ subgroup_summary$ factor_presence
Rank Factor Count Percent
2 1 er 475 54.472477
6 2 pgr 384 44.036697
7 3 size 262 30.045872
1 4 age 231 26.490826
3 5 grade3 154 17.660550
5 6 nodes 146 16.743119
4 7 meno 67 7.683486
Code
summaries$ subgroup_summary$ factor_presence_specific
Rank Base_Factor Factor_Definition Count Percent
124 1 er {er <= 0} 288 33.02752
147 2 grade3 {grade3} 152 17.43119
Forest Search n-fold cross-validation
Code
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_boot,".RData" ))
load (file = filename)
}
# Kfolds = n (default to n-fold cross-validations)
fs_OOB <- NULL
fs_OOB <- forestsearch_Kfold (fs.est = fs, details = TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Cross-validation setup:
- Observations: 686
- Folds: 686
- Fold sizes (range): 1-1
ForestSearch parameters for CV folds:
- sg_focus: hr
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: TRUE
- use_grf: TRUE
- (per-fold parallel: sequential)
- (per-fold details: FALSE)
- (per-fold plot.sg: FALSE)
Cross-validation complete:
- Time: 4.82 minutes
- Subgroup found in 97.8 % of folds
Any found: 0.9781341
Exact match: 0.8921283
At least 1 match: 0.9781341
Cov 1 any: 0.9781341
Cov 2 any: 0.8921283
Cov 1 and 2 any: 0.8921283
Cov 1 exact: 0.9781341
Cov 2 exact: 0.8921283
Agreement (sens, ppv) in H and Hc: 0.704918 0.9856 0.8269231 0.9716088
Code
# Reset workers to single
plan (sequential)
summary_OOB <- forestsearch_KfoldOut (res= fs_OOB, details= TRUE , outall= TRUE )
Any found: 0.9781341
Exact match: 0.8921283
At least 1 match: 0.9781341
Cov 1 any: 0.9781341
Cov 2 any: 0.8921283
Cov 1 and 2 any: 0.8921283
Cov 1 exact: 0.9781341
Cov 2 exact: 0.8921283
Agreement (sens, ppv) in H and Hc: 0.704918 0.9856 0.8269231 0.9716088
Subgroup n n1 m1 m0 RMST
Overall "ITT" "686 (100.0%)" "246 (35.9%)" "66.3" "50.2" "7.8"
FA_0 "Not recommend" "61 (8.9%)" "23 (37.7%)" "18.5" "48" "-19"
KfA_0 "Not recommend" "52 (7.6%)" "14 (26.9%)" NA "42.9" "9.3"
FA_1 "Recommend" "625 (91.1%)" "223 (35.7%)" "66.7" "52.2" "9.6"
KfA_1 "Recommend" "634 (92.4%)" "232 (36.6%)" "66.3" "55" "6.7"
Hazard ratio
Overall "0.69 (0.54, 0.89)"
FA_0 "2.54 (1.25, 5.17)"
KfA_0 "0.60 (0.22, 1.64)"
FA_1 "0.61 (0.47, 0.79)"
KfA_1 "0.71 (0.55, 0.91)"
Code
table (summary_OOB$ SGs_found[,1 ])
!{age <= 43} {er <= 0}
5 666
Code
table (summary_OOB$ SGs_found[,2 ])
!{age <= 43} {er <= 0} {nodes <= 7} {pgr <= 32} {pgr <= 33} {pgr <= 7}
3 5 3 4 36 1
{size <= 35}
612
Code
Ksims <- 200
fs_ten <- forestsearch_tenfold (fs.est = fs, sims = Ksims, Kfolds = 10 , details = TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Starting repeated K-fold cross-validation:
- Simulations: 200
- Folds per simulation: 10
- Workers: 13
ForestSearch parameters for CV folds:
- sg_focus: hr
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1.25
- hr.consistency: 1
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: TRUE
- use_grf: TRUE
- (per-fold parallel: sequential)
- (per-fold details: FALSE)
- (per-fold plot.sg: FALSE)
Repeated K-fold CV complete:
- Time: 12.39 minutes
- Successful simulations: 200 / 200
- Projected hours per 100 sims: 0.1
Code
# Reset workers to single
plan (sequential)
print (fs_ten$ find_summary)
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact
0.7 0.0 0.5 0.5 0.0 0.0 0.5
Cov2 exact
0.0
Code
print (fs_ten$ sens_summary)
sens_H sens_Hc ppv_H ppv_Hc
0.4918033 0.9664000 0.5763403 0.9506173
Code
print (head (fs_ten$ sens_out))
sens_H sens_Hc ppv_H ppv_Hc
[1,] 0.4590164 0.9680 0.5833333 0.9482759
[2,] 0.5409836 0.9632 0.5892857 0.9555556
[3,] 0.6229508 0.9424 0.5135135 0.9624183
[4,] 0.5245902 0.9600 0.5614035 0.9538951
[5,] 0.3442623 0.9792 0.6176471 0.9386503
[6,] 0.4098361 0.9632 0.5208333 0.9435737
Code
print (head (fs_ten$ find_out))
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.6 0 0.5 0.5 0 0 0.5 0
[2,] 0.7 0 0.5 0.5 0 0 0.5 0
[3,] 0.9 0 0.6 0.6 0 0 0.6 0
[4,] 0.7 0 0.5 0.5 0 0 0.5 0
[5,] 0.5 0 0.4 0.4 0 0 0.4 0
[6,] 0.7 0 0.4 0.4 0 0 0.4 0
Code
# Save all results
output_dir <- "results/"
save_results <- dir.exists (output_dir)
if (save_results) {
filename <- file.path (output_dir,
paste0 (fileout_cv,
".RData" ))
save (df.analysis, fs, fs_bc, fs_ten, fs_OOB, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg-k2_v3_hr_CV=200.RData
Code
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_cv,".RData" ))
load (file = filename)
}
#' # Define subgroups to display
subgroups <- list (
age_gt65 = list (
subset_expr = "age > 65" ,
name = "age > 65" ,
type = "reference"
),
age_lt65 = list (
subset_expr = "age <= 65" ,
name = "age <= 65" ,
type = "reference"
),
pgr_positive = list (
subset_expr = "pgr > 0" ,
name = "pgr > 0" ,
type = "reference"
),
pgr_negative = list (
subset_expr = "pgr <= 0" ,
name = "pgr <= 0" ,
type = "reference"
)
)
# Create the forest plot
result <- plot_subgroup_results_forestplot (
fs_results = list (fs.est = fs, fs_bc = fs_bc, fs_OOB = fs_OOB, fs_kfold = fs_ten),
df_analysis = df.analysis,
subgroup_list = subgroups,
outcome.name = "time_months" ,
event.name = "status" ,
treat.name = "hormon" ,
E.name = "Hormon" ,
C.name = "CT" ,
ci_column_spaces = 25
)
# Display the plot
plot (result$ plot)